library(brms)
library(data.table)

# SET WORKING DIRECTORY
setwd("C:/Users/drmil/Dropbox/White House Lobbying/3YP/APSR Final/Replication Archive/visitor_logs_analyses")

clinton_logs_analysis <- fread("clinton_logs_analysis.csv", header = TRUE, stringsAsFactors = FALSE)
obama_logs_analysis <- fread("obama_logs_analysis.csv", header = TRUE, stringsAsFactors = FALSE)

################################################################################

# PAGE 19--SHARE OF INTERESTS IN DATA WITH CFSCORES/IGSCORES

# SUBSETTING TO UNIQUE INTERESTS INCLUDED IN EITHER ANALYSIS

clinton_logs_interests <- subset(clinton_logs_analysis, select = c(crp_orgname,
                                                                   cfscore,
                                                                   igscore))
obama_logs_interests <- subset(obama_logs_analysis, select = c(crp_orgname,
                                                                   cfscore,
                                                                   igscore))
logs_interests <- rbind(clinton_logs_interests, obama_logs_interests)
logs_interests <- logs_interests[!duplicated(logs_interests),]

# HOW MANY/WHAT % OF INTERESTS HAVE CF/IGSCORES?

sum(!is.na(logs_interests$cfscore))
sum(!is.na(logs_interests$cfscore))/dim(logs_interests)[1]
sum(!is.na(logs_interests$igscore))
sum(!is.na(logs_interests$igscore))/dim(logs_interests)[1]

################################################################################

# PAGE 29--PROBABILITIES OF HIGH/LOW-QUALITY WHITE HOUSE ENGAGEMENT TO COMPARE 
# WITH KALLA AND BROOCKMAN (2016)

# LOADING IN THE QUALITY MODELS USED TO MAKE FIGURE 5/TABLE SI.11

load("tableSI11_cols1and2.RData")
load("tableSI11_cols3and4.RData")

# CALCULATING THE PREDICTED PROBABILITIES OF HIGH-QUALITY ENGAGEMENT FOR BOTH
# ADMINISTRATIONS

# NOTE; UNLIKE IN fig3.R AND fig4.R, WE ARE ONLY INTERESTED IN THE CHANGE IN
# PROBABILITY WHEN MOVING FROM NO CONTRIBUTIONS TO THE FIRST QUARTILE OF
# CONTRIBUTIONS (WITH ALL OTHER COVARIATES AT OBSERVED VALUES), SO THE SYNTAX
# RESEMBLES THAT IN THOSE TWO R SCRIPTS BUT GENERATES PREDICTED PROBABILITIES
# FOR ONLY THOSE TWO SCENARIOS

newdata_clinton <- data.frame(any_visits_high_leq0_1l=clinton_logs_analysis$any_visits_high_leq0_1l,
                              any_visits_low_leq0_1l=clinton_logs_analysis$any_visits_low_leq0_1l,
                              lobbyexp_1l=clinton_logs_analysis$lobbyexp_1l,
                              inhouse_lobby=clinton_logs_analysis$inhouse_lobby,
                              any_visits_leq0_1l=clinton_logs_analysis$any_visits_leq0_1l, 
                              industry_pid=clinton_logs_analysis$industry_pid,
                              made_donations_l1tol4=c(rep(0,dim(clinton_logs_analysis)[1]), 
                                                      rep(1,dim(clinton_logs_analysis)[1])), 
                              amt_donations_1ltol4=c(rep(0, dim(clinton_logs_analysis)[1]),
                                                     rep(quantile(clinton_logs_analysis$amt_donations_1ltol4[which(clinton_logs_analysis$made_donations_l1tol4==1)], 
                                                                  probs = c(0.25), 
                                                                  na.rm = TRUE), dim(clinton_logs_analysis)[1])),
                              ACC=clinton_logs_analysis$ACC,
                              ADV=clinton_logs_analysis$ADV,
                              AER=clinton_logs_analysis$AER,
                              AGR=clinton_logs_analysis$AGR,
                              ALC=clinton_logs_analysis$ALC,
                              ANI=clinton_logs_analysis$ANI,
                              APP=clinton_logs_analysis$APP,
                              ART=clinton_logs_analysis$ART,
                              AUT=clinton_logs_analysis$AUT,
                              AVI=clinton_logs_analysis$AVI,
                              BAN=clinton_logs_analysis$BAN,
                              BEV=clinton_logs_analysis$BEV,
                              BNK=clinton_logs_analysis$BNK,
                              BUD=clinton_logs_analysis$BUD,
                              CAW=clinton_logs_analysis$CAW,
                              CDT=clinton_logs_analysis$CDT,
                              CHM=clinton_logs_analysis$CHM,
                              CIV=clinton_logs_analysis$CIV,
                              COM=clinton_logs_analysis$COM,
                              CON=clinton_logs_analysis$CON,
                              CPI=clinton_logs_analysis$CPI,
                              CPT=clinton_logs_analysis$CPT,
                              CSP=clinton_logs_analysis$CSP,
                              DEF=clinton_logs_analysis$DEF,
                              DIS=clinton_logs_analysis$DIS,
                              DOC=clinton_logs_analysis$DOC,
                              ECN=clinton_logs_analysis$ECN,
                              EDU=clinton_logs_analysis$EDU,
                              ENG=clinton_logs_analysis$ENG,
                              ENV=clinton_logs_analysis$ENV,
                              FAM=clinton_logs_analysis$FAM,
                              FIN=clinton_logs_analysis$FIN,
                              FIR=clinton_logs_analysis$FIR,
                              FOO=clinton_logs_analysis$FOO,
                              FOR=clinton_logs_analysis$FOR,
                              FUE=clinton_logs_analysis$FUE,
                              GAM=clinton_logs_analysis$GAM,
                              GOV=clinton_logs_analysis$GOV,
                              HCR=clinton_logs_analysis$HCR,
                              HOU=clinton_logs_analysis$HOU,
                              IMM=clinton_logs_analysis$IMM,
                              IND=clinton_logs_analysis$IND,
                              INS=clinton_logs_analysis$INS,
                              LAW=clinton_logs_analysis$LAW,
                              LBR=clinton_logs_analysis$LBR,
                              MAN=clinton_logs_analysis$MAN,
                              MAR=clinton_logs_analysis$MAR,
                              MED=clinton_logs_analysis$MED,
                              MIA=clinton_logs_analysis$MIA,
                              MMM=clinton_logs_analysis$MMM,
                              MON=clinton_logs_analysis$MON,
                              NAT=clinton_logs_analysis$NAT,
                              PHA=clinton_logs_analysis$PHA,
                              POS=clinton_logs_analysis$POS,
                              REL=clinton_logs_analysis$REL,
                              RES=clinton_logs_analysis$RES,
                              RET=clinton_logs_analysis$RET,
                              ROD=clinton_logs_analysis$ROD,
                              RRR=clinton_logs_analysis$RRR,
                              SCI=clinton_logs_analysis$SCI,
                              SMB=clinton_logs_analysis$SMB,
                              SPO=clinton_logs_analysis$SPO,
                              TAX=clinton_logs_analysis$TAX,
                              TEC=clinton_logs_analysis$TEC,
                              TOB=clinton_logs_analysis$TOB,
                              TOR=clinton_logs_analysis$TOR,
                              TOU=clinton_logs_analysis$TOU,
                              TRA=clinton_logs_analysis$TRA,
                              TRD=clinton_logs_analysis$TRD,
                              TRU=clinton_logs_analysis$TRU,
                              UNM=clinton_logs_analysis$UNM,
                              URB=clinton_logs_analysis$URB,
                              UTI=clinton_logs_analysis$UTI,
                              VET=clinton_logs_analysis$VET,
                              WAS=clinton_logs_analysis$WAS,
                              WEL=clinton_logs_analysis$WEL)

predicted_values_clinton <- as.data.frame(fitted(tableSI11_cols1and2, 
                                                 newdata_clinton, 
                                                 summary = FALSE,
                                                 re_formula = NA))

# IN BLOCKS OF 31673 COLUMNS, THE PREDICTED VALUES ARE GROUPED BY HIGH/LOW
# QUALITY ENGAGEMENT, THEN BY COVARIATE SETTING.  BECAUSE WE ONLY WANT TO KNOW
# ABOUT THE PREDICTED PROBABILITIES FOR HIGH QUALITY ENGAGEMENT, WE TAKE THE
# FIRST TWO GROUPINGS OF 31673

predicted_values_clinton_0contrib_dist <- rowMeans(predicted_values_clinton[,1:31673])
predicted_values_clinton_25contrib_dist <- rowMeans(predicted_values_clinton[,31674:63346])

# PREDICTED PROBABILITIES AT EACH CONTRIBUTION AMOUNT AND 95% CIs

mean(predicted_values_clinton_0contrib_dist)
quantile(predicted_values_clinton_0contrib_dist, probs = c(0.025, 0.975))
mean(predicted_values_clinton_25contrib_dist)
quantile(predicted_values_clinton_25contrib_dist, probs = c(0.025, 0.975))

# DIFFERENCE IN DISTRIBUTIONS OF PREDICTIONS and 95% CI OF DIFFERENCE

mean(predicted_values_clinton_25contrib_dist - predicted_values_clinton_0contrib_dist)
quantile(predicted_values_clinton_25contrib_dist - predicted_values_clinton_0contrib_dist, probs = c(0.025, 0.975))

# CLEAR ENVIRONMENT OF ALL UNNECESSARY OBJECTS BEFORE PROCEEDING TO OBAMA
# ADMINISTRATION (WHICH REQUIRES A LOT OF MEMORY TO CONDUCT)

rm(list= ls()[!(ls() %in% c('obama_logs_analysis',
                            'tableSI11_cols3and4'))])

newdata_obama <- data.frame(any_visits_high_leq0_1l=obama_logs_analysis$any_visits_high_leq0_1l,
                            any_visits_low_leq0_1l=obama_logs_analysis$any_visits_low_leq0_1l,
                            lobbyexp_1l=obama_logs_analysis$lobbyexp_1l,
                            inhouse_lobby=obama_logs_analysis$inhouse_lobby,
                            any_visits_leq0_1l=obama_logs_analysis$any_visits_leq0_1l, 
                            industry_pid=obama_logs_analysis$industry_pid,
                            made_donations_l1tol8=c(rep(0,dim(obama_logs_analysis)[1]), 
                                                    rep(1,dim(obama_logs_analysis)[1])), 
                            amt_donations_1ltol8=c(rep(0, dim(obama_logs_analysis)[1]),
                                                   rep(quantile(obama_logs_analysis$amt_donations_1ltol8[which(obama_logs_analysis$made_donations_l1tol8==1)], 
                                                                probs = c(0.25), 
                                                                na.rm = TRUE), dim(obama_logs_analysis)[1])),
                            ACC=obama_logs_analysis$ACC,
                            ADV=obama_logs_analysis$ADV,
                            AER=obama_logs_analysis$AER,
                            AGR=obama_logs_analysis$AGR,
                            ALC=obama_logs_analysis$ALC,
                            ANI=obama_logs_analysis$ANI,
                            APP=obama_logs_analysis$APP,
                            ART=obama_logs_analysis$ART,
                            AUT=obama_logs_analysis$AUT,
                            AVI=obama_logs_analysis$AVI,
                            BAN=obama_logs_analysis$BAN,
                            BEV=obama_logs_analysis$BEV,
                            BNK=obama_logs_analysis$BNK,
                            BUD=obama_logs_analysis$BUD,
                            CAW=obama_logs_analysis$CAW,
                            CDT=obama_logs_analysis$CDT,
                            CHM=obama_logs_analysis$CHM,
                            CIV=obama_logs_analysis$CIV,
                            COM=obama_logs_analysis$COM,
                            CON=obama_logs_analysis$CON,
                            CPI=obama_logs_analysis$CPI,
                            CPT=obama_logs_analysis$CPT,
                            CSP=obama_logs_analysis$CSP,
                            DEF=obama_logs_analysis$DEF,
                            DIS=obama_logs_analysis$DIS,
                            DOC=obama_logs_analysis$DOC,
                            ECN=obama_logs_analysis$ECN,
                            EDU=obama_logs_analysis$EDU,
                            ENG=obama_logs_analysis$ENG,
                            ENV=obama_logs_analysis$ENV,
                            FAM=obama_logs_analysis$FAM,
                            FIN=obama_logs_analysis$FIN,
                            FIR=obama_logs_analysis$FIR,
                            FOO=obama_logs_analysis$FOO,
                            FOR=obama_logs_analysis$FOR,
                            FUE=obama_logs_analysis$FUE,
                            GAM=obama_logs_analysis$GAM,
                            GOV=obama_logs_analysis$GOV,
                            HCR=obama_logs_analysis$HCR,
                            HOM=obama_logs_analysis$HOM,
                            HOU=obama_logs_analysis$HOU,
                            IMM=obama_logs_analysis$IMM,
                            IND=obama_logs_analysis$IND,
                            INS=obama_logs_analysis$INS,
                            INT=obama_logs_analysis$INT,
                            LAW=obama_logs_analysis$LAW,
                            LBR=obama_logs_analysis$LBR,
                            MAN=obama_logs_analysis$MAN,
                            MAR=obama_logs_analysis$MAR,
                            MED=obama_logs_analysis$MED,
                            MIA=obama_logs_analysis$MIA,
                            MIN=obama_logs_analysis$MIN,
                            MMM=obama_logs_analysis$MMM,
                            MON=obama_logs_analysis$MON,
                            NAT=obama_logs_analysis$NAT,
                            PHA=obama_logs_analysis$PHA,
                            POS=obama_logs_analysis$POS,
                            REL=obama_logs_analysis$REL,
                            RES=obama_logs_analysis$RES,
                            RET=obama_logs_analysis$RET,
                            ROD=obama_logs_analysis$ROD,
                            RRR=obama_logs_analysis$RRR,
                            SCI=obama_logs_analysis$SCI,
                            SMB=obama_logs_analysis$SMB,
                            SPO=obama_logs_analysis$SPO,
                            TAR=obama_logs_analysis$TAR,
                            TAX=obama_logs_analysis$TAX,
                            TEC=obama_logs_analysis$TEC,
                            TOB=obama_logs_analysis$TOB,
                            TOR=obama_logs_analysis$TOR,
                            TOU=obama_logs_analysis$TOU,
                            TRA=obama_logs_analysis$TRA,
                            TRD=obama_logs_analysis$TRD,
                            TRF=obama_logs_analysis$TRF,
                            TRU=obama_logs_analysis$TRU,
                            UNM=obama_logs_analysis$UNM,
                            URB=obama_logs_analysis$URB,
                            UTI=obama_logs_analysis$UTI,
                            VET=obama_logs_analysis$VET,
                            WAS=obama_logs_analysis$WAS,
                            WEL=obama_logs_analysis$WEL)

# DUE TO MEMORY LIMITS, I NEEDED TO CALCULATE THE PREDICTED PROBABILITIES IN
# TWO CHuNKS FOR EACH COVARIATE SETTING

predicted_values_obama_0contrib_pt1 <- as.data.frame(fitted(tableSI11_cols3and4, 
                                                            newdata_obama[1:153383,], 
                                                            summary = FALSE, 
                                                            re_formula = NA))
predicted_values_obama_0contrib_pt2 <- as.data.frame(fitted(tableSI11_cols3and4, 
                                                        newdata_obama[153384:306766,], 
                                                        summary = FALSE, 
                                                        re_formula = NA))

predicted_values_obama_0contrib <- cbind(predicted_values_obama_0contrib_pt1[,1:153383],
                                         predicted_values_obama_0contrib_pt2[,1:153383])

rm(predicted_values_obama_0contrib_pt1, predicted_values_obama_0contrib_pt2)

predicted_values_obama_25contrib_pt1 <- as.data.frame(fitted(tableSI11_cols3and4, 
                                                         newdata_obama[306767:460149,], 
                                                         summary = FALSE, 
                                                         re_formula = NA))
predicted_values_obama_25contrib_pt2 <- as.data.frame(fitted(tableSI11_cols3and4, 
                                                             newdata_obama[460150:613532,], 
                                                             summary = FALSE, 
                                                             re_formula = NA))

predicted_values_obama_25contrib <- cbind(predicted_values_obama_25contrib_pt1[,1:153383],
                                          predicted_values_obama_25contrib_pt2[,1:153383])

rm(predicted_values_obama_25contrib_pt1, predicted_values_obama_25contrib_pt2)

predicted_values_obama_0contrib_dist <- rowMeans(predicted_values_obama_0contrib)
predicted_values_obama_25contrib_dist <- rowMeans(predicted_values_obama_25contrib)

# PREDICTED PROBABILITIES AT EACH CONTRIBUTION AMOUNT AND 95% CIs

mean(predicted_values_obama_0contrib_dist)
quantile(predicted_values_obama_0contrib_dist, probs = c(0.025, 0.975))
mean(predicted_values_obama_25contrib_dist)
quantile(predicted_values_obama_25contrib_dist, probs = c(0.025, 0.975))

# DIFFERENCE IN DISTRIBUTIONS OF PREDICTIONS and 95% CI OF DIFFERENCE

mean(predicted_values_obama_25contrib_dist - predicted_values_obama_0contrib_dist)
quantile(predicted_values_obama_25contrib_dist - predicted_values_obama_0contrib_dist, probs = c(0.025, 0.975))
